# -*- coding:utf-8 -*-

import requests
from lxml import etree
from fake_useragent import UserAgent
from http import cookiejar
import json
from datetime import datetime
import time
import os

requests.packages.urllib3.disable_warnings()  # 忽略HTTPS安全警告

"""
今日头条
https://www.toutiao.com/search/?keyword=%E5%8F%8C11
"""

class TouTiao():
    def __init__(self):
        #声明一个CookieJar对象实例来保存cookie
        self.cookie = cookiejar.CookieJar()
        ua = UserAgent(use_cache_server=False)  # 禁用服务器缓存
        self.headers = {
            "user-agent": ua.random,
            "authority": "www.toutiao.com",
            "method": "GET",
            "path": "/api/search/content/?aid=24&app_name=web_search&offset=0&format=json&keyword=996&autoload=true&count=20&en_qc=1&cur_tab=1&from=search_tab&pd=synthesis&timestamp=1575268500312",
            "scheme": "https",
            "accept": "application/json, text/javascript",
            "accept-encoding": "gzip, deflate, br",
            "accept-language": "zh-CN,zh;q=0.9",
            "cache-control": "no-cache",
            "content-type": "application/x-www-form-urlencoded",
            "dnt": "1",
            "pragma": "no-cache",
            "referer": "https://www.toutiao.com/search/?keyword=996",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-origin",
            "x-requested-with": "XMLHttpRequest",
            "cookie":"csrftoken=35722d3c36efb7dc6fef1e5a61831667; tt_webid=6765692035614934536; s_v_web_id=8e1dff2e9e540799a34035d5dde2de42; __tasessionId=ixixymsrg1575268499693"
        }

    def get_comments(self, offset):
        postData = {
            "aid": 24,
            "app_name": "web_search",
            "offset": offset,  # 0,  # 每次递增20
            "format": "json",
            "keyword": "996",
            "autoload": "true",
            "count": 20,
            "en_qc": 1,
            "cur_tab": 1,
            "from": "search_tab",
            "pd": "synthesis",
            "timestamp": int(time.time()*1000)
        }
        url = "https://www.toutiao.com/api/search/content/"
        html = HttpUtils.do_request("GET", url, self.headers, postData)
        data_json = json.loads(html.text)
        for c in data_json['data']:
            if c.get('article_url') is not None:
                article_url = c.get('article_url')   # 提取每篇文章的url
                abstract = c['abstract']  # 文章简介
                comments_count = c['comments_count']  # 评论数量
                datetime = c['datetime']  # 发布时间
                media_url = c['media_url']  # 发布人的链接
                source = c['source']  # 来源
                title = c['title']  # 标题
                print("===================")
                if(comments_count >= 500):
                    print(article_url)
                print(title)
                print(datetime)


if __name__ == '__main__':
    toutiao = TouTiao()
    for p in range(0, 180, 20):
        toutiao.get_comments(p)
        print(f"======={p}=========")
        time.sleep(3)